In [1]:
# imports
%matplotlib inline
# %pylab osx
import os
import tensorflow as tf
import numpy as np

from scipy.misc import imresize
from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.colors as colors
import as cmx'ggplot')

# Bit of formatting because I don't like the default inline code style:
from IPython.core.display import HTML
HTML("""<style> .rendered_html code { 
    padding: 2px 4px;
    color: #c7254e;
    background-color: #f9f2f4;
    border-radius: 4px;
} </style>""")


Painting and image

In [60]:
def crop_square(img):
    if img.shape[0] > img.shape[1]:
        extra = (img.shape[0] - img.shape[1])
        if extra % 2 == 0:
            crop = img[extra // 2:-extra // 2, :]
            crop = img[max(0, extra // 2 - 1):min(-1, -extra // 2), :]
    elif img.shape[1] > img.shape[0]:
        extra = (img.shape[1] - img.shape[0])
        if extra % 2 == 0:
            crop = img[:, extra // 2:-extra // 2]
            crop = img[:, max(0, extra // 2 - 1):min(-1, -extra // 2)]
        crop = img
    return crop

def crop_center(img, new_height, new_width):
   width =  np.size(img,1)
   height =  np.size(img,0)

   left = np.ceil((width - new_width)/2.)
   top = np.ceil((height - new_height)/2.)
   right = np.floor((width + new_width)/2.)
   bottom = np.floor((height + new_height)/2.)
   cImg = img[top:bottom, left:right]
   return cImg

image_files  = ['./images/20130712_190436_1.jpg','./images/20140108_162814.jpg']
img_orig     = [plt.imread(im) for im in image_files] 
img_cropped  = [crop_square(im) for im in img_orig]
imgs_resized = [imresize(im, (256, 256)) for im in img_cropped]

background   = Image.fromarray(imgs_resized[0])
overlay      = Image.fromarray(imgs_resized[1])

background   = background.convert("RGBA")
overlay      = overlay.convert("RGBA")
blended      = Image.blend(background, overlay, 0.65).convert("RGB")
img          = np.asarray(blended)

img          = imgs_resized[0]

# xs: pixel coordinates, ys: RGB values
xs = []
ys = []
getting_data = [1,1]
for row_i in range(img.shape[0]):
    for col_i in range(img.shape[1]):
        xs.append([row_i, col_i])
        ys.append(img[row_i, col_i])

xs = np.array(xs)
ys = np.array(ys)

xs = (xs - np.mean(xs)) / np.std(xs)

print xs.shape, ys.shape

(65536, 2) (65536, 3)

Define a single layer in tensorflow

A single layer consists of linear unit plus and activation function. The W values are initialized using random values drawn from a normal distribution while biases are initialized to zero. More on variables creation, initialization, saving and loading in here

In [3]:
# Define a single hidden layer with activation function
# Creating variables with scope (unique name) allows easy chaining of layers
def linear(X, n_input, n_output, activation, scope):
    with tf.variable_scope(scope):
        # Create/return variable with a given scope
        W = tf.get_variable(
            shape=[n_input, n_output],
            initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
        b = tf.get_variable(
        h = tf.matmul(X, W) + b
        if activation is not None:
            h = activation(h)
        return h

Create the deep network

By chaining together layers starting with the input (stream of (x,y) values) and ending with the output - corresponding stream of RGB values.

In [63]:
g  = tf.get_default_graph()

# in: 2 (x,y), out 
n_neurons = [2, 64, 64, 64, 64, 64, 64, 64, 3]

X = tf.placeholder(tf.float32, shape=[None, 2], name='X')
Y = tf.placeholder(tf.float32, shape=[None, 3], name='Y')

current_input = X
for layer_i in range(1, len(n_neurons)):
    current_input = linear(
        n_input=n_neurons[layer_i - 1],
        activation=tf.nn.relu if (layer_i+1) < len(n_neurons) else None,
        scope='layer_' + str(layer_i))
Y_pred = current_input

Define a cost function

The cost function defined below is the average of the absolute difference between known RGB values (Y) and RGB values predicted by the network (Y_pred).

In [64]:
# L1 cost function
def distance_l1(p1, p2):
    return tf.abs(p1 - p2)
def distance_l2(p1, p2):
    return tf.pow(p1 - p2,2)

distance  = distance_l2
cost      = tf.reduce_mean(tf.reduce_sum(distance(Y_pred, Y), 1))  # cost: maen of sum of absolute values of RGB 

print Y.get_shape(),Y_pred.get_shape(),tf.reduce_sum(distance(Y_pred, Y), 1).get_shape(),cost.get_shape()

(?, 3) (?, 3) (?,) ()


Using specifig number of iterations, batch size and learning rate

In [66]:
n_iterations  = 2000
batch_size    = 50
learning_rate = 0.0005

imgs          = []
costs         = []
gif_step      = 50

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
    # Initialize all tf variables using specified initializers

    # We now run a loop over epochs
    prev_training_cost = -1.0
    for it_i in range(n_iterations):
        idxs = np.random.permutation(range(len(xs)))
        n_batches = len(idxs) // batch_size
        for batch_i in range(n_batches):
            idxs_i = idxs[batch_i * batch_size: (batch_i + 1) * batch_size]
                     feed_dict={X: xs[idxs_i], Y: ys[idxs_i]})

        training_cost =, feed_dict={X: xs, Y: ys})

        store_img = False   
        if prev_training_cost < 0.0 or training_cost < prev_training_cost:
            print 'Iteration: {} , cost: {}'.format(it_i, training_cost)
            store_img = True
            ys_pred   = Y_pred.eval(feed_dict={X: xs}, session=sess)
            img       = np.clip(ys_pred.reshape(img.shape), 0, 255).astype(np.uint8)
            prev_training_cost = training_cost

        if (it_i + 1) % gif_step == 0:
            print 'Iteration: {}'.format(it_i)

Make a video clip

In [67]:
import moviepy.editor as mpy
i = -1

class MakeClip():
    def __init__(self,imgs):
        self.imgs = imgs
        self.i    = -1

    def make_frame(self,t):
        print ' t= ',t
        self.i += 1
        if self.i >= len(self.imgs):
            self.i = 0
        return self.imgs[self.i]

print 'N images in clip: ',len(imgs)
make_clip = MakeClip(imgs)
clip1      = mpy.VideoClip(make_clip.make_frame, duration=2) # 2 seconds

clip2 = mpy.ImageSequenceClip(imgs, fps=100)

